About the analysis

This is an analysis for the impact of introducing the lunch program in 1955 the growth trend of the Japanese Boys

https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3398189

The data

The data source is Ministry of Internal Affairs and Communications Statistics Bureau.

www.e-stat.go.jp/SG1/estat/List.do?bid=000001014499&

NOTE: each column represent an age group

data <- read_xlsx("FEH_00400002_221209195404.xlsx")
data[data == "***"] <- NA
data <- clean_names(data)
datatable(data)

Cleaning the data

Changing the data to long format

data <- data %>% select(year, x17) %>% rename(height = x17) %>% mutate(height =  as.numeric(height)) %>% mutate(year = year - 1900)
datatable(data)

Analysis Steps

1. choose a maximum height (e.g. 200 cm)

max_height <- 175

2. divide all heights by the maximum

data$height_scaled <- data$height / max_height

3. calculate the logistic transform of the divided heights, log(y/(1-y))

data$logit_height <- log(data$height_scaled / (1 - data$height_scaled))

4. Model this data against time

model_all <- lm(logit_height ~ year, data = data)

5. back transform to get the predicted value at each year

data$predicted_all <- predict(model_all, newdata = data)
data$predicted_all <- exp(data$predicted_all)/(1+exp(data$predicted_all))*max_height
datatable(data)

5b. Creating a Plot

ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
  scale_color_manual(labels = c("Observed Heights", "Predicted Data"), values = c("blue", "red")) + geom_line() + geom_line(aes(y = predicted_all, colour = "Predicted Data"))  

6. Model the data only up to the beginning of the war

war_year <- 1940 - 1900
end_war_year <- 1948 - 1900
data_pre_war <- data %>% filter(year <= war_year)
fit_pre_war <- lm(logit_height ~ year, data = data_pre_war)

7. project this forward to now

data$predicted_pre_war <- predict(fit_pre_war, newdata = data)
data$predicted_pre_war <- exp(data$predicted_pre_war)/(1+exp(data$predicted_pre_war))*max_height
datatable(data)

7b. Plotting

ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
  scale_color_manual(labels = c("Observed Heights", "Predicted Data Full Model", "Pre War Model"), values = c("blue", "red", "black")) + geom_line() + geom_line(aes(y = predicted_all, colour = "Predicted Data"))  + geom_line(aes(y = predicted_pre_war, colour = "Pre War Projection")) 

8. Generate a predicted curve using only post-war data

data_post_war <- data %>% filter(year >= end_war_year)
fit_post_war <- lm(logit_height ~ I(year)^2 + year, data = data_post_war)

data$predicted_post_war <- predict(fit_post_war, newdata = data)
## Warning in predict.lm(fit_post_war, newdata = data): prediction from a
## rank-deficient fit may be misleading
data$predicted_post_war <- exp(data$predicted_post_war)/(1+exp(data$predicted_post_war))*max_height
datatable(data)

8b Fitting a direct model

data_post_war <- data %>% filter(year >= end_war_year)
fit_post_war_direct <- lm(logit_height ~ I(year)^2, data = data_post_war)

data$predicted_post_war_direct <- predict(fit_post_war_direct, newdata = data)
data$predicted_post_war_direct <- exp(data$predicted_post_war_direct)/(1+exp(data$predicted_post_war_direct))*max_height
datatable(data)
ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
  scale_color_manual(labels = c("Observed Heights", "Direct Fit", "Poly"), values = c("blue", "red", "black")) + 
  geom_line() +
  geom_line(aes(y = predicted_post_war_direct, colour = "Direct Fit")) + 
  geom_line(aes(y = predicted_post_war, colour = "Poly")) 

9. Plotting

full_plot <- ggplot(data = data, aes(x = year, y = height, colour = "Oberved Heights")) +
  scale_color_manual(labels = c("Observed Heights", "Predicted Data Full Model", "Pre War Model", "Post War Model Poly", "Post War Direct"), values = c("blue", "red", "black", "green", "grey")) + 
  geom_line() + 
  geom_line(aes(y = predicted_all, colour = "Predicted Data"))  + 
  geom_line(aes(y = predicted_pre_war, colour = "Pre War Projection")) + 
  geom_line(aes(y = predicted_post_war, colour = "Post War Model Poly")) +
    geom_line(aes(y = predicted_post_war, colour = "Post War Direct")) 

ggplotly(full_plot)